library(phyloseq)
library(decontam)
library(reltools)
library(minpack.lm)
library(devtools)
## Loading required package: usethis
library(tyRa)
##
## Attaching package: 'tyRa'
## The following objects are masked from 'package:reltools':
##
## fit_sncm, plot_sncm_fit
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.1.2
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
library(Biostrings)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:phyloseq':
##
## distance
## Loading required package: XVector
## Loading required package: GenomeInfoDb
##
## Attaching package: 'Biostrings'
## The following objects are masked from 'package:Hmisc':
##
## mask, translate
## The following object is masked from 'package:base':
##
## strsplit
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:Biostrings':
##
## collapse, intersect, setdiff, setequal, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following object is masked from 'package:XVector':
##
## slice
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:Hmisc':
##
## src, summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.6 ✔ purrr 1.0.1
## ✔ tidyr 1.1.4 ✔ stringr 1.4.0
## ✔ readr 2.1.3 ✔ forcats 0.5.1
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'purrr' was built under R version 4.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks Biostrings::collapse(), IRanges::collapse()
## ✖ dplyr::combine() masks BiocGenerics::combine()
## ✖ purrr::compact() masks XVector::compact()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks S4Vectors::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ BiocGenerics::Position() masks ggplot2::Position(), base::Position()
## ✖ purrr::reduce() masks IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ dplyr::slice() masks XVector::slice(), IRanges::slice()
## ✖ dplyr::src() masks Hmisc::src()
## ✖ dplyr::summarize() masks Hmisc::summarize()
library(vegan)
## Loading required package: permute
##
## Attaching package: 'permute'
##
## The following object is masked from 'package:devtools':
##
## check
##
## This is vegan 2.5-7
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.1.2
library(readxl)
library(tibble)
library(ggtext)
## Warning: package 'ggtext' was built under R version 4.1.2
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 4.1.2
library(microbiome)
##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2020 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
##
## Attaching package: 'microbiome'
##
## The following object is masked from 'package:vegan':
##
## diversity
##
## The following object is masked from 'package:Biostrings':
##
## coverage
##
## The following objects are masked from 'package:IRanges':
##
## coverage, transform
##
## The following object is masked from 'package:S4Vectors':
##
## transform
##
## The following object is masked from 'package:ggplot2':
##
## alpha
##
## The following object is masked from 'package:base':
##
## transform
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
fungi.colors <- c("#c6dbef","#9ecae1","#6baed6","#3182bd","#08519c",
"#c7e9c0", "#a1d99b", "#74c476", "#41ab5d", "#238b45", "#005a32",
"#fdd0a2", "#fdae6b", "#fd8d3c", "#f16913", "#d94801", "#8c2d04",
"#dadaeb", "#bcbddc", "#9e9ac8", "#807dba", "#6a51a3", "#4a1486",
"#fcbba1", "#fc9272", "#fb6a4a", "#ef3b2c", "#cb181d", "#99000d",
"#d9d9d9", "#bdbdbd", "#969696", "#737373", "#525252", "#252525")
#metadata
samp_dat_fungi <- read.csv("/Users/lau/Desktop/PLPA6820/2023.02.26Gittor/phyloseq_input/metadata_libr.prepall02.01.22.csv", na.strings = "NA")
rownames(samp_dat_fungi) <- samp_dat_fungi$Sample #row names must match OTU table headers to be able to be readed
SAMP.fungi <- phyloseq::sample_data(samp_dat_fungi)
# OTU table
otu_fungi <- read.csv("/Users/lau/Desktop/PLPA6820/2023.02.26Gittor/phyloseq_input/otu_table_ITS_UNOISE_R1.csv")
rownames(otu_fungi) <- otu_fungi$OTU
otu_fungi <- otu_fungi[,-1]
OTU.fungi <- phyloseq::otu_table(otu_fungi, taxa_are_rows = TRUE)
colnames(otu_fungi)
## [1] "SoilAT2R1W0" "SoilAT3R1W0" "SoilBT5R1W0" "SoilAT5R1W0" "SoilAT4R1W0"
## [6] "SoilBT3R1W0" "SoilBT1R1W0" "SoilBT4R1W0" "SoilAT1R1W0" "SoilBT2R1W0"
## [11] "SoilAT4R2W0" "SoilAT5R2W0" "SoilBT2R2W0" "SoilBT1R2W0" "SoilAT1R2W0"
## [16] "SoilBT3R2W0" "SoilAT3R2W0" "SoilAT2R2W0" "SoilBT5R2W0" "SoilBT4R2W0"
## [21] "SoilBT2R3W0" "SoilAT3R3W0" "SoilAT5R3W0" "SoilBT5R3W0" "SoilBT1R3W0"
## [26] "SoilAT4R3W0" "SoilAT1R3W0" "SoilBT3R3W0" "SoilBT4R3W0" "SoilAT2R3W0"
## [31] "SoilBT5R4W0" "SoilAT1R4W0" "SoilAT2R4W0" "SoilAT5R4W0" "SoilBT3R4W0"
## [36] "SoilBT4R4W0" "SoilBT1R4W0" "SoilAT3R4W0" "SoilAT4R4W0" "SoilBT2R4W0"
## [41] "SoilAT1R5W0" "SoilBT1R5W0" "SoilBT4R5W0" "SoilAT5R5W0" "SoilBT2R5W0"
## [46] "SoilAT4R5W0" "SoilAT2R5W0" "SoilAT3R5W0" "SoilBT3R5W0" "SoilBT5R5W0"
## [51] "SoilBT4R6W0" "SoilAT3R6W0" "SoilAT1R6W0" "SoilAT4R6W0" "SoilBT3R6W0"
## [56] "SoilAT2R6W0" "SoilBT5R6W0" "SoilAT5R6W0" "SoilBT1R6W0" "SoilBT2R6W0"
## [61] "SoilAT2R1W2" "SoilAT3R1W2" "SoilBT5R1W2" "SoilAT5R1W2" "SoilAT4R1W2"
## [66] "SoilBT3R1W2" "SoilBT1R1W2" "SoilBT4R1W2" "SoilAT1R1W2" "SoilBT2R1W2"
## [71] "SoilAT4R2W2" "SoilAT5R2W2" "SoilBT2R2W2" "SoilBT1R2W2" "SoilAT1R2W2"
## [76] "SoilBT3R2W2" "SoilAT3R2W2" "SoilAT2R2W2" "SoilBT5R2W2" "SoilBT4R2W2"
## [81] "SoilBT2R3W2" "SoilAT3R3W2" "SoilAT5R3W2" "SoilBT5R3W2" "SoilBT1R3W2"
## [86] "SoilAT4R3W2" "SoilAT1R3W2" "SoilBT3R3W2" "SoilBT4R3W2" "SoilAT2R3W2"
## [91] "SoilBT5R4W2" "SoilAT1R4W2" "SoilAT2R4W2" "SoilAT5R4W2" "NCP1"
## [96] "PC1" "SoilBT3R4W2" "SoilBT4R4W2" "NEC1" "SoilBT1R4W2"
## [101] "SoilAT3R4W2" "SoilAT4R4W2" "SoilBT2R4W2" "SoilAT1R5W2" "SoilBT1R5W2"
## [106] "SoilBT4R5W2" "SoilAT5R5W2" "SoilBT2R5W2" "SoilAT4R5W2" "SoilAT2R5W2"
## [111] "SoilAT3R5W2" "SoilBT3R5W2" "SoilBT5R5W2" "SoilBT4R6W2" "SoilAT3R6W2"
## [116] "SoilAT1R6W2" "SoilAT4R6W2" "SoilBT3R6W2" "SoilAT2R6W2" "SoilBT5R6W2"
## [121] "SoilAT5R6W2" "SoilBT1R6W2" "SoilBT2R6W2" "SoilAT2R1W5" "SoilAT3R1W5"
## [126] "SoilBT5R1W5" "SoilAT5R1W5" "SoilAT4R1W5" "SoilBT3R1W5" "SoilBT1R1W5"
## [131] "SoilBT4R1W5" "SoilAT1R1W5" "SoilBT2R1W5" "SoilAT4R2W5" "SoilAT5R2W5"
## [136] "SoilBT2R2W5" "SoilBT1R2W5" "SoilAT1R2W5" "SoilBT3R2W5" "SoilAT3R2W5"
## [141] "SoilAT2R2W5" "SoilBT5R2W5" "SoilBT4R2W5" "SoilBT2R3W5" "SoilAT3R3W5"
## [146] "SoilAT5R3W5" "NEC2" "SoilBT5R3W5" "SoilBT1R3W5" "SoilAT4R3W5"
## [151] "SoilAT1R3W5" "SoilBT3R3W5" "SoilBT4R3W5" "SoilAT2R3W5" "SoilBT5R4W5"
## [156] "SoilAT1R4W5" "SoilAT2R4W5" "SoilAT5R4W5" "SoilBT3R4W5" "SoilBT4R4W5"
## [161] "SoilBT1R4W5" "SoilAT3R4W5" "SoilAT4R4W5" "SoilBT2R4W5" "SoilAT1R5W5"
## [166] "SoilBT1R5W5" "SoilBT4R5W5" "SoilAT5R5W5" "SoilBT2R5W5" "SoilAT4R5W5"
## [171] "NEC3" "SoilAT2R5W5" "SoilAT3R5W5" "SoilBT3R5W5" "SoilBT5R5W5"
## [176] "SoilBT4R6W5" "SoilAT3R6W5" "SoilAT1R6W5" "SoilAT4R6W5" "SoilBT3R6W5"
## [181] "SoilAT2R6W5" "SoilBT5R6W5" "SoilAT5R6W5" "SoilBT1R6W5" "SoilBT2R6W5"
## [186] "SoilAT2R1W7" "SoilAT3R1W7" "SoilBT5R1W7" "SoilAT5R1W7" "SoilAT4R1W7"
## [191] "NCP2" "PC2" "SoilBT3R1W7" "SoilBT1R1W7" "SoilBT4R1W7"
## [196] "SoilAT1R1W7" "NEC4" "SoilBT2R1W7" "SoilAT4R2W7" "SoilAT5R2W7"
## [201] "SoilBT2R2W7" "SoilBT1R2W7" "SoilAT1R2W7" "SoilBT3R2W7" "SoilAT3R2W7"
## [206] "SoilAT2R2W7" "SoilBT5R2W7" "SoilBT4R2W7" "SoilBT2R3W7" "SoilAT3R3W7"
## [211] "SoilAT5R3W7" "SoilBT5R3W7" "SoilBT1R3W7" "SoilAT4R3W7" "SoilAT1R3W7"
## [216] "SoilBT3R3W7" "SoilBT4R3W7" "SoilAT2R3W7" "SoilBT5R4W7" "NEC5"
## [221] "SoilAT1R4W7" "SoilAT2R4W7" "SoilAT5R4W7" "SoilBT3R4W7" "SoilBT4R4W7"
## [226] "SoilBT1R4W7" "SoilAT3R4W7" "SoilAT4R4W7" "SoilBT2R4W7" "SoilAT1R5W7"
## [231] "SoilBT1R5W7" "SoilBT4R5W7" "SoilAT5R5W7" "SoilBT2R5W7" "SoilAT4R5W7"
## [236] "SoilAT2R5W7" "SoilAT3R5W7" "SoilBT3R5W7" "SoilBT5R5W7" "SoilBT4R6W7"
## [241] "SoilAT3R6W7" "SoilAT1R6W7" "SoilAT4R6W7" "SoilBT3R6W7" "NEC6"
## [246] "SoilAT2R6W7" "SoilBT5R6W7" "SoilAT5R6W7" "SoilBT1R6W7" "SoilBT2R6W7"
## [251] "SoilAT2R1W9" "SoilAT3R1W9" "SoilBT5R1W9" "SoilAT5R1W9" "SoilAT4R1W9"
## [256] "SoilBT3R1W9" "SoilBT1R1W9" "SoilBT4R1W9" "SoilAT1R1W9" "SoilBT2R1W9"
## [261] "SoilAT4R2W9" "SoilAT5R2W9" "SoilBT2R2W9" "SoilBT1R2W9" "SoilAT1R2W9"
## [266] "SoilBT3R2W9" "SoilAT3R2W9" "SoilAT2R2W9" "NEC7" "SoilBT5R2W9"
## [271] "SoilBT4R2W9" "SoilBT2R3W9" "SoilAT3R3W9" "SoilAT5R3W9" "SoilBT5R3W9"
## [276] "SoilBT1R3W9" "SoilAT4R3W9" "SoilAT1R3W9" "SoilBT3R3W9" "SoilBT4R3W9"
## [281] "SoilAT2R3W9" "SoilBT5R4W9" "SoilAT1R4W9" "SoilAT2R4W9" "SoilAT5R4W9"
## [286] "SoilBT3R4W9" "NCP3" "PC3" "SoilBT4R4W9" "SoilBT1R4W9"
## [291] "SoilAT3R4W9" "SoilAT4R4W9" "SoilBT2R4W9" "SoilAT1R5W9" "NEC8"
## [296] "SoilBT1R5W9" "SoilBT4R5W9" "SoilAT5R5W9" "SoilBT2R5W9" "SoilAT4R5W9"
## [301] "SoilAT2R5W9" "SoilAT3R5W9" "SoilBT3R5W9" "SoilBT5R5W9" "SoilBT4R6W9"
## [306] "SoilAT3R6W9" "SoilAT1R6W9" "SoilAT4R6W9" "SoilBT3R6W9" "SoilAT2R6W9"
## [311] "SoilBT5R6W9" "SoilAT5R6W9" "SoilBT1R6W9" "SoilBT2R6W9" "NEC9"
## [316] "NCP4" "NCP5" "NCP6" "NCP7" "NCP8"
## [321] "NCP9" "NCP10" "NCP11" "PC4"
# Taxonomy
unite_taxonomy <-read.csv("/Users/lau/Desktop/PLPA6820/2023.02.26Gittor/phyloseq_input/taxfungi_R1_UNITE.csv",
header = TRUE,
row.names = 1)
any(unite_taxonomy$Kingdom == "unidentified")
## [1] TRUE
nrow(unite_taxonomy[unite_taxonomy$Kingdom == "unidentified", ])
## [1] 2772
unite_taxonomy[unite_taxonomy$Kingdom == "unidentified", ]
unite_taxonomy %>% dplyr::filter(unite_taxonomy$Kingdom == "unidentified")
unite_taxonomy <- subset(unite_taxonomy, Kingdom %in% c("Fungi", "Mocki"))
dim(unite_taxonomy)
## [1] 3140 8
# Removing bacteria and other non-target taxa ----------------------------------------------------------------------------
head(unite_taxonomy)
levels(as.factor(unite_taxonomy$Kingdom))
## [1] "Fungi" "Mocki"
levels(as.factor(unite_taxonomy$Class))
## [1] "Agaricomycetes" "Agaricostilbomycetes"
## [3] "Archaeorhizomycetes" "Archaeosporomycetes"
## [5] "Atractiellomycetes" "Basidiobolomycetes"
## [7] "Blastocladiomycetes" "Calcarisporiellomycetes"
## [9] "Chytridiomycetes" "Cystobasidiomycetes"
## [11] "Dacrymycetes" "Dothideomycetes"
## [13] "Endogonomycetes" "Entorrhizomycetes"
## [15] "Eurotiomycetes" "Exobasidiomycetes"
## [17] "Geminibasidiomycetes" "Glomeromycetes"
## [19] "GS13" "GS14"
## [21] "Kickxellomycetes" "Laboulbeniomycetes"
## [23] "Lecanoromycetes" "Leotiomycetes"
## [25] "Lobulomycetes" "Malasseziomycetes"
## [27] "Microbotryomycetes" "Mockomycetes"
## [29] "Mortierellomycetes" "Mucoromycetes"
## [31] "Mucoromycotina_cls_Incertae_sedis" "Olpidiomycetes"
## [33] "Orbiliomycetes" "Paraglomeromycetes"
## [35] "Pezizomycetes" "Pezizomycotina_cls_Incertae_sedis"
## [37] "Pucciniomycetes" "Rhizophlyctidomycetes"
## [39] "Rhizophydiomycetes" "Rozellomycotina_cls_Incertae_sedis"
## [41] "Saccharomycetes" "Sanchytriomycetes"
## [43] "Sordariomycetes" "Spizellomycetes"
## [45] "Synchytriomycetes" "Taphrinomycetes"
## [47] "Tremellomycetes" "Umbelopsidomycetes"
## [49] "unidentified" "Ustilaginomycetes"
## [51] "Wallemiomycetes" "Zoopagomycetes"
unite_taxonomy$OTU <- rownames(unite_taxonomy)
TAX.fungi.unite <- phyloseq::tax_table(as.matrix(unite_taxonomy))
FASTA.fungi <- readDNAStringSet("/Users/lau/Desktop/PLPA6820/2023.02.26Gittor/phyloseq_input/otus_R1.fasta", seek.first.rec=TRUE, use.names=TRUE)
physeq_fungi_nonfilt <- phyloseq::phyloseq(OTU.fungi, TAX.fungi.unite, FASTA.fungi, SAMP.fungi)
#this will decontaminate the data by comparing to those microbes also found in the control control samples
physeq_fungi_nonfilt@sam_data$Sample_or_Control <- ifelse(physeq_fungi_nonfilt@sam_data$Isolate.Code %in% c("NEC", "NCP"), "Control Sample", "True Sample")
sample_data(physeq_fungi_nonfilt)$is.neg <- sample_data(physeq_fungi_nonfilt)$Sample_or_Control == "Control Sample"
contamdf.prev <- isContaminant(physeq_fungi_nonfilt, method="prevalence", neg="is.neg", threshold = 0.1, normalize = TRUE)
badTaxa <- rownames(contamdf.prev[contamdf.prev$contaminant == TRUE,])
print(badTaxa)
## [1] "FOTU_3391" "FOTU_5159" "FOTU_571"
ps.pa <- transform_sample_counts(physeq_fungi_nonfilt, function(abund) 1*(abund>0))
ps.pa.neg <- prune_samples(sample_data(ps.pa)$Sample_or_Control == "Control Sample", ps.pa)
ps.pa.pos <- prune_samples(sample_data(ps.pa)$Sample_or_Control == "True Sample", ps.pa)
# Make data.frame of prevalence in positive and negative samples
df.pa <- data.frame(pa.pos=taxa_sums(ps.pa.pos), pa.neg=taxa_sums(ps.pa.neg),
contaminant=contamdf.prev$contaminant)
#chart name decontaminate
ggplot(data=df.pa, aes(x=pa.neg, y=pa.pos, color=contaminant)) + geom_point() +
xlab("Prevalence (Negative Controls)") + ylab("Prevalence (True Samples)")
goodTaxa <- setdiff(taxa_names(physeq_fungi_nonfilt), badTaxa)
fungi_sub_no_bad <- prune_taxa(goodTaxa, physeq_fungi_nonfilt)
# analyse the positive controls Mock sequences
# Sanity check - we only want OTUs that are Fungi
unique(fungi_sub_no_bad@tax_table@.Data[,1])# We only want Kingdom Fungi
## [1] "Fungi" "Mocki"
fungi.obj1 <- fungi_sub_no_bad %>%
subset_taxa(Kingdom == "Fungi") %>%
subset_samples(!Isolate.Code %in% c("NEC", "NCP", "PC")) %>%
phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE) # remove taxa with zero reads (i.e., those not present in objective 1)
unique(fungi.obj1@tax_table@.Data[,1])# We only want Kingdom Fungi
## [1] "Fungi"
sort(data.frame(sample_sums(fungi.obj1))[,1], decreasing = TRUE)
## [1] 243553 192718 189876 152572 152465 148369 143627 140231 138660 137176
## [11] 132323 131912 128118 127059 125352 125047 124503 123537 123308 122402
## [21] 121638 120076 119148 118824 118168 117892 115718 114135 113493 112136
## [31] 111406 110409 108759 108409 106851 106630 105862 105345 105098 104294
## [41] 103694 103245 102640 101878 101714 101433 101414 100978 100530 100296
## [51] 99760 99422 99186 98894 98743 98339 98023 97782 97108 96608
## [61] 96257 95957 95582 95254 95112 94630 94416 94098 93520 93149
## [71] 92750 92258 91803 91602 91203 90828 90755 90415 89902 89804
## [81] 89541 89239 88787 88772 87635 86988 86451 86240 86220 85996
## [91] 85895 85804 85656 85644 85557 85539 85504 85332 84959 83820
## [101] 83712 83206 83162 83135 82554 82527 82506 81628 81602 81258
## [111] 81092 80687 80531 80522 80521 80089 80089 80034 79950 79841
## [121] 79479 79347 78920 78830 78270 78203 78076 77898 77775 77379
## [131] 77371 77316 77210 77059 77015 76860 76354 75130 74755 74689
## [141] 74675 74560 74032 73803 73657 72934 72915 72890 72748 72304
## [151] 72263 72139 71952 71847 71650 70939 70676 70603 70382 70381
## [161] 70375 69646 69628 69414 69277 69174 68927 68897 68694 68569
## [171] 68239 67951 67898 67431 66996 66873 66858 66745 66724 66598
## [181] 66189 66130 65870 65605 65498 65235 65005 64903 64487 64455
## [191] 64148 63538 63209 62835 62658 62417 62392 62254 61498 61430
## [201] 61207 61063 60681 60629 60360 60217 60128 59707 59696 59204
## [211] 59183 59092 58889 58885 58815 58441 58290 58101 57747 57715
## [221] 57628 57549 57545 57507 57410 57288 57174 57018 56788 56743
## [231] 56651 56602 56415 55951 55567 55458 55030 54784 54057 54052
## [241] 54037 53875 53735 53593 53386 53309 53128 52842 52786 52492
## [251] 52307 52259 52138 51991 51852 51816 51613 51278 50376 50178
## [261] 49858 49740 49124 49074 48498 48266 47863 47748 47660 47554
## [271] 47315 47225 47215 46871 46858 46463 45742 45275 44661 43966
## [281] 42468 41632 41621 40680 40236 39338 39035 37994 35673 34587
## [291] 34068 33893 32864 32181 30884 30826 26795 25555 284
# we are going to trash all the samples below 5,000. to make sure we take the best samples.
## FILTER OUT SAMPLES BELOW 5000 reads
fungi.obj1_5000reads <- prune_samples(sample_sums(fungi.obj1) > 5000, fungi.obj1) %>%
phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE)
sum(taxa_sums(fungi.obj1_5000reads)) # 22,957,826
## [1] 22957826
#Final total for fungi - 22,957,826 reads across 298 (you got this number from:"fungi.obj1_5000reads") samples
mean(sample_sums(fungi.obj1_5000reads)) # 77,039
## [1] 77039.68
median(sample_sums(fungi.obj1_5000reads)) # 72,526
## [1] 72526
# Save an object to a file
saveRDS(fungi.obj1_5000reads, file = "Fungi_peanut_soil_nonorm_041723.rds")
# Restore the object. you can start from here!!
fungi.no.norm <- readRDS(file = "Fungi_peanut_soil_nonorm_041723.rds")
## Rarefaction analysis
sam.data <- data.frame(fungi.no.norm@sam_data)
fOTU.table <- fungi.no.norm@otu_table
S <- specnumber(t(fOTU.table)) # observed number of species
raremax <- min(rowSums(t(fOTU.table)))
Srare <- rarefy(t(fOTU.table), raremax)
#chart name rarefaction_1
plot(S, Srare, xlab = "Observed No. of Species", ylab = "Rarefied No. of Species")
abline(0, 1)
rare.fun <- rarecurve(t(fOTU.table), step = 1000, sample = raremax, col = "blue", cex = 0.6)
fungi.rare.curve.extract <- NULL
for(i in 1:length(rare.fun)){
sample.200 <- data.frame(rare.spec = rare.fun[[i]])
sample.200$read_depth <- attr(rare.fun[[i]], "Subsample")
sample.200$Sample <- rownames(t(fOTU.table[,i]))
fungi.rare.curve.extract <- rbind.data.frame(fungi.rare.curve.extract, sample.200)
}
fungi.rare.curve.extract2 <- left_join(sam.data, fungi.rare.curve.extract, by = "Sample")
fungi.rare <- ggplot(fungi.rare.curve.extract2, aes(x = read_depth, y = rare.spec, group = Sample)) +
#geom_point() +
geom_line() +
xlab("Reads") +
ylab("Number of OTUs") +
theme_classic() +
geom_vline(xintercept = 72526, linetype = "dashed") +
ggtitle("Fungi")
fungi.rare
MGS <- phyloseq_to_metagenomeSeq(fungi.no.norm)
## Loading required namespace: metagenomeSeq
p <- metagenomeSeq::cumNormStatFast(MGS)
## Default value being used.
MGS <- metagenomeSeq::cumNorm(MGS, p =p)
metagenomeSeq::normFactors(MGS) # exports the normalized factors for each sample
## SoilAT2R1W0 SoilAT3R1W0 SoilBT5R1W0 SoilAT5R1W0 SoilBT3R1W0 SoilBT1R1W0
## 674 1900 1953 2448 1938 2363
## SoilBT4R1W0 SoilAT1R1W0 SoilBT2R1W0 SoilAT4R2W0 SoilAT5R2W0 SoilBT2R2W0
## 1219 1620 1213 1162 1369 2736
## SoilBT1R2W0 SoilAT1R2W0 SoilBT3R2W0 SoilAT3R2W0 SoilAT2R2W0 SoilBT5R2W0
## 2172 2966 1890 912 880 1504
## SoilBT4R2W0 SoilBT2R3W0 SoilAT3R3W0 SoilAT5R3W0 SoilBT5R3W0 SoilBT1R3W0
## 2143 1603 974 1147 871 2329
## SoilAT4R3W0 SoilAT1R3W0 SoilBT3R3W0 SoilBT4R3W0 SoilAT2R3W0 SoilBT5R4W0
## 2491 2284 2255 939 761 2069
## SoilAT1R4W0 SoilAT2R4W0 SoilAT5R4W0 SoilBT3R4W0 SoilBT4R4W0 SoilBT1R4W0
## 1436 1952 2603 2041 1763 2150
## SoilAT3R4W0 SoilAT4R4W0 SoilBT2R4W0 SoilAT1R5W0 SoilBT1R5W0 SoilBT4R5W0
## 1096 2109 2047 1292 976 1244
## SoilAT5R5W0 SoilBT2R5W0 SoilAT4R5W0 SoilAT2R5W0 SoilAT3R5W0 SoilBT3R5W0
## 3069 2073 1591 1560 962 1736
## SoilBT5R5W0 SoilBT4R6W0 SoilAT3R6W0 SoilAT1R6W0 SoilAT4R6W0 SoilBT3R6W0
## 1963 2527 1544 1698 1465 2085
## SoilAT2R6W0 SoilBT5R6W0 SoilAT5R6W0 SoilBT1R6W0 SoilBT2R6W0 SoilAT2R1W2
## 2610 1649 1638 2401 1895 1367
## SoilAT3R1W2 SoilBT5R1W2 SoilAT5R1W2 SoilAT4R1W2 SoilBT3R1W2 SoilBT1R1W2
## 2015 1277 1066 2268 1732 1485
## SoilBT4R1W2 SoilAT1R1W2 SoilBT2R1W2 SoilAT4R2W2 SoilAT5R2W2 SoilBT2R2W2
## 1429 1154 1914 1426 1172 1977
## SoilBT1R2W2 SoilAT1R2W2 SoilBT3R2W2 SoilAT3R2W2 SoilAT2R2W2 SoilBT5R2W2
## 1433 2261 3728 990 1347 1149
## SoilBT4R2W2 SoilBT2R3W2 SoilAT3R3W2 SoilAT5R3W2 SoilBT5R3W2 SoilBT1R3W2
## 1297 1718 1235 1663 1418 1925
## SoilAT4R3W2 SoilAT1R3W2 SoilBT3R3W2 SoilBT4R3W2 SoilAT2R3W2 SoilBT5R4W2
## 1096 628 1921 1193 1222 1238
## SoilAT1R4W2 SoilAT2R4W2 SoilAT5R4W2 SoilBT3R4W2 SoilBT4R4W2 SoilBT1R4W2
## 1475 746 1251 1847 1899 1868
## SoilAT3R4W2 SoilAT4R4W2 SoilBT2R4W2 SoilAT1R5W2 SoilBT1R5W2 SoilBT4R5W2
## 1855 2010 2388 1889 1141 1963
## SoilAT5R5W2 SoilBT2R5W2 SoilAT4R5W2 SoilAT2R5W2 SoilAT3R5W2 SoilBT3R5W2
## 2214 1349 1627 2616 2016 2225
## SoilBT5R5W2 SoilBT4R6W2 SoilAT3R6W2 SoilAT1R6W2 SoilAT4R6W2 SoilBT3R6W2
## 1828 886 998 2191 1264 1957
## SoilAT2R6W2 SoilBT5R6W2 SoilAT5R6W2 SoilBT1R6W2 SoilBT2R6W2 SoilAT2R1W5
## 2566 1495 564 2130 1868 476
## SoilAT3R1W5 SoilBT5R1W5 SoilAT5R1W5 SoilAT4R1W5 SoilBT3R1W5 SoilBT1R1W5
## 748 760 616 503 953 1189
## SoilBT4R1W5 SoilAT1R1W5 SoilBT2R1W5 SoilAT4R2W5 SoilAT5R2W5 SoilBT2R2W5
## 2290 1284 2331 459 584 753
## SoilBT1R2W5 SoilAT1R2W5 SoilBT3R2W5 SoilAT3R2W5 SoilAT2R2W5 SoilBT5R2W5
## 1307 797 747 975 852 659
## SoilBT4R2W5 SoilBT2R3W5 SoilAT3R3W5 SoilAT5R3W5 SoilBT5R3W5 SoilBT1R3W5
## 976 814 658 1073 1425 1061
## SoilAT4R3W5 SoilAT1R3W5 SoilBT3R3W5 SoilBT4R3W5 SoilAT2R3W5 SoilBT5R4W5
## 1112 1609 1026 1628 1734 697
## SoilAT1R4W5 SoilAT2R4W5 SoilAT5R4W5 SoilBT3R4W5 SoilBT4R4W5 SoilBT1R4W5
## 672 486 582 949 639 1300
## SoilAT3R4W5 SoilAT4R4W5 SoilBT2R4W5 SoilAT1R5W5 SoilBT1R5W5 SoilBT4R5W5
## 575 623 508 1469 1219 1314
## SoilAT5R5W5 SoilBT2R5W5 SoilAT4R5W5 SoilAT2R5W5 SoilAT3R5W5 SoilBT3R5W5
## 1610 1633 1658 1729 739 1408
## SoilBT5R5W5 SoilBT4R6W5 SoilAT1R6W5 SoilAT4R6W5 SoilBT3R6W5 SoilAT2R6W5
## 831 799 1222 818 1832 1219
## SoilBT5R6W5 SoilAT5R6W5 SoilBT1R6W5 SoilBT2R6W5 SoilAT2R1W7 SoilAT3R1W7
## 726 793 844 1084 1659 820
## SoilBT5R1W7 SoilAT5R1W7 SoilAT4R1W7 SoilBT3R1W7 SoilBT1R1W7 SoilBT4R1W7
## 1441 1636 1701 2870 1975 1928
## SoilAT1R1W7 SoilBT2R1W7 SoilAT4R2W7 SoilAT5R2W7 SoilBT2R2W7 SoilBT1R2W7
## 1739 2306 1311 704 1230 1930
## SoilAT1R2W7 SoilBT3R2W7 SoilAT3R2W7 SoilAT2R2W7 SoilBT5R2W7 SoilBT4R2W7
## 1299 1332 2259 891 1166 3457
## SoilBT2R3W7 SoilAT3R3W7 SoilAT5R3W7 SoilBT5R3W7 SoilBT1R3W7 SoilAT4R3W7
## 1826 1153 2032 1535 2189 1402
## SoilAT1R3W7 SoilBT3R3W7 SoilBT4R3W7 SoilAT2R3W7 SoilBT5R4W7 SoilAT1R4W7
## 1728 1538 1943 2687 1772 640
## SoilAT2R4W7 SoilAT5R4W7 SoilBT3R4W7 SoilBT4R4W7 SoilBT1R4W7 SoilAT3R4W7
## 480 877 2572 1781 2137 1021
## SoilAT4R4W7 SoilBT2R4W7 SoilAT1R5W7 SoilBT1R5W7 SoilBT4R5W7 SoilAT5R5W7
## 897 2097 3071 2326 1330 917
## SoilBT2R5W7 SoilAT4R5W7 SoilAT2R5W7 SoilAT3R5W7 SoilBT3R5W7 SoilBT5R5W7
## 1773 907 1180 1363 2766 730
## SoilBT4R6W7 SoilAT3R6W7 SoilAT1R6W7 SoilAT4R6W7 SoilBT3R6W7 SoilAT2R6W7
## 831 811 2347 784 2047 776
## SoilBT5R6W7 SoilAT5R6W7 SoilBT1R6W7 SoilBT2R6W7 SoilAT2R1W9 SoilAT3R1W9
## 1014 740 747 1665 697 1212
## SoilBT5R1W9 SoilAT5R1W9 SoilAT4R1W9 SoilBT3R1W9 SoilBT1R1W9 SoilBT4R1W9
## 1470 1461 1223 1151 1148 1608
## SoilAT1R1W9 SoilBT2R1W9 SoilAT4R2W9 SoilAT5R2W9 SoilBT2R2W9 SoilBT1R2W9
## 623 1235 1634 1276 1233 1533
## SoilAT1R2W9 SoilBT3R2W9 SoilAT3R2W9 SoilAT2R2W9 SoilBT5R2W9 SoilBT4R2W9
## 1777 1527 1640 1188 1871 1604
## SoilBT2R3W9 SoilAT3R3W9 SoilAT5R3W9 SoilBT5R3W9 SoilBT1R3W9 SoilAT4R3W9
## 873 2837 1466 1445 1479 1215
## SoilAT1R3W9 SoilBT3R3W9 SoilBT4R3W9 SoilAT2R3W9 SoilBT5R4W9 SoilAT1R4W9
## 2600 2272 1865 1744 1329 384
## SoilAT2R4W9 SoilAT5R4W9 SoilBT3R4W9 SoilBT4R4W9 SoilBT1R4W9 SoilAT3R4W9
## 1693 1160 1505 1633 1684 1738
## SoilAT4R4W9 SoilBT2R4W9 SoilAT1R5W9 SoilBT1R5W9 SoilBT4R5W9 SoilAT5R5W9
## 1811 1986 1342 1445 1730 1266
## SoilBT2R5W9 SoilAT4R5W9 SoilAT2R5W9 SoilAT3R5W9 SoilBT3R5W9 SoilBT5R5W9
## 3322 2445 1197 957 2518 1903
## SoilBT4R6W9 SoilAT3R6W9 SoilAT1R6W9 SoilAT4R6W9 SoilBT3R6W9 SoilAT2R6W9
## 1024 1199 1314 830 1273 1402
## SoilBT5R6W9 SoilAT5R6W9 SoilBT1R6W9 SoilBT2R6W9
## 1141 1040 802 2868
norm.fungi <- metagenomeSeq::MRcounts(MGS, norm = T)
norm.fungi.OTU <- phyloseq::otu_table(norm.fungi, taxa_are_rows = TRUE)
fungi.css.norm <- phyloseq::phyloseq(norm.fungi.OTU, TAX.fungi.unite, FASTA.fungi, SAMP.fungi)
## fungi CSS NORM RDS
#SAVE the fungi phyloseq object as an RDS file to load faster in future.
# Save an object to a file
saveRDS(fungi.css.norm, file = "Fungi_peanut_soil_CSS_041723.rds")
# Restore the object
fungi.css.norm <- readRDS(file = "Fungi_peanut_soil_CSS_041723.rds")
# Beta diversity
fungi.dist.bray = phyloseq::distance(fungi.css.norm, "bray") # create bray-curtis distance matrix
fungi.ord <- ordinate(fungi.css.norm, "PCoA", "bray")
global.nmds <- plot_ordination(fungi.css.norm, ordination = fungi.ord, type = "samples")
global.nmds.data <- global.nmds$data
#takes a long time to run
adonis2(fungi.dist.bray~Soil*as.factor(week)*as.factor(Treatment), as(sample_data(fungi.css.norm), "data.frame"), permutations = 9999)
#beta diversity plot: Principal coordinate chart
ggplot() +
geom_point(data = global.nmds.data, aes(x = Axis.1, y = Axis.2, shape = as.factor(Treatment), fill = as.factor(week)), alpha = 0.8, size = 2) +
theme_bw() +
ylab("PCoA2") +
xlab("PCoA1") +
scale_fill_manual(values=cbbPalette) +
stat_ellipse(data = global.nmds.data, aes(x = Axis.1, y = Axis.2, group = Soil), type = "norm", linetype = 2) +
scale_shape_manual(values=c(21, 22, 23, 24, 25)) +
guides(fill=guide_legend(override.aes=list(shape=21)))
Some work with Aspergillus flavus: -we sub-set for this specie to obtain richness that is the number of A. flavus in the soil we sampled and we plot it (basically compare) -Shannon test -Simpson test result: phyloseq-class experiment-level object otu_table() OTU Table: [ 5 taxa and 298 samples ] sample_data() Sample Data: [ 298 samples by 23 sample variables ] tax_table() Taxonomy Table: [ 5 taxa by 8 taxonomic ranks ] refseq() DNAStringSet: [ 5 reference sequences ]
#sub-set A. flavus
aspergillus <- fungi.no.norm %>% subset_taxa(Species == "Aspergillus_flavus")
#compare aspergillus by Alpha diversity = richness (# of species in area)
# alpha diversity is the mean species diversity in a site at a local scale#
fungi.no.norm@sam_data$shannon <- estimate_richness(fungi.no.norm, measures=c("Shannon"))$Shannon
fungi.no.norm@sam_data$chao <- estimate_richness(fungi.no.norm, measures=c("Chao1"))$Chao1
fungi.no.norm@sam_data$invsimpson <- estimate_richness(fungi.no.norm, measures=c("InvSimpson"))$InvSimpson
fungi.no.norm@sam_data$sequence_depth <- as.numeric(sample_sums(fungi.no.norm))
fungi.no.norm@sam_data$richness <- estimate_richness(fungi.no.norm, measures=c("Observed"))$Observed
fungi.no.norm@sam_data$even <- fungi.no.norm@sam_data$shannon/log(fungi.no.norm@sam_data$richness)
#Relative abundance Aspergillus flavus
A.flavusrelativeabundance <- fungi.no.norm %>%
transform_sample_counts(function(x) x / sum(x) ) %>%
psmelt()%>%
subset(Species == "Aspergillus_flavus")
## Warning in psmelt(.): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(.): The rank names:
## OTU
## have been renamed to:
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
ggplot(A.flavusrelativeabundance[A.flavusrelativeabundance$Abundance < 0.04,], aes(x = as.factor(week), y = Abundance, color = as.factor(Treatment))) +
stat_summary(fun.y=mean,geom="line", aes(group = Treatment)) +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
#geom_jitter()+
theme_classic() +
ylab("Relative Abundance (%)") +
xlab("") +
scale_color_manual(values = cbbPalette) +
scale_y_continuous(labels = scales::percent) +
labs(fill = "Treatment") +
theme(legend.text = element_text(size = 10),
legend.key = element_blank(),
legend.title = element_text(size = 10),
legend.position = "right",
strip.text.x = element_text(size = 10, vjust=2),
panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
facet_wrap(~Soil, scales = "free")
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
ggplot(A.flavusrelativeabundance, aes(x = as.factor(week), y = Abundance, fill = as.factor(Treatment))) +
#facet_wrap(~week, nrow = 1, scales = "free_x", strip.position="bottom") +
geom_bar(stat = "identity", alpha = 0.9) +
theme_minimal() +
ylab("Relative Abundance (%)") +
xlab("") +
scale_fill_manual(values = sample(fungi.colors)) +
scale_y_continuous(labels = scales::percent) +
labs(fill = "Treatment") +
theme(axis.text.x = element_blank(),
legend.text = element_text(size = 10),
legend.key = element_blank(),
legend.title = element_text(size = 10),
legend.position = "right",
strip.text.x = element_text(size = 10, vjust=2),
panel.grid.major = element_blank(), panel.grid.minor = element_blank())
We filter for soil A and soil B. Verify they have the same 20 prevalent fungi (is it weird knowing the soils were really different in the Principal coordinate chart)
set.seed(12348)
topx.fungi <- top_taxa(fungi.no.norm, n = 20)
fung.composition <- fungi.no.norm %>%
subset_taxa(OTU %in% topx.fungi) %>%
microbiome::transform("compositional") %>%
psmelt() %>%
group_by(Treatment, Soil, Label) %>%
summarise(MeanRelAbund = mean(Abundance)) %>%
left_join(as.data.frame(tax_table(fungi.no.norm), by = "Label")) %>%
ggplot(aes(Treatment, MeanRelAbund, fill = Label)) +
geom_bar(stat = "identity") +
theme_classic() +
scale_fill_manual(values= c(cbbPalette, fungi.colors)) +
scale_y_continuous(labels = scales::percent) +
labs(x = "", y = "Relative abundance (%)", title = "Fungi") +
theme(axis.text.x = element_text(angle=45, hjust=1),
legend.text = element_text(face = "italic", size = 5),
legend.title = element_blank(),
legend.key.size = unit(0.3, 'cm'))
## Warning in psmelt(.): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(.): The rank names:
## OTU
## have been renamed to:
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
## `summarise()` has grouped output by 'Treatment', 'Soil'. You can override using the `.groups` argument.
## Joining, by = "Label"
#facet_wrap(~Soil, nrow = 1)
fung.composition
This is to verify if running them separated would change the OTUs
fungi.no.norm.A <- fungi.no.norm %>%
subset_samples(Soil == "A") %>%
phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE)
#soil A
set.seed(12348)
topx.fungi.A <- top_taxa(fungi.no.norm.A, n = 20)
fungi.composition.A <- fungi.no.norm %>%
subset_taxa(OTU %in% topx.fungi) %>%
microbiome::transform("compositional") %>%
psmelt() %>%
group_by(Treatment, Label) %>%
summarise(MeanRelAbund = mean(Abundance)) %>%
left_join(as.data.frame(tax_table(fungi.no.norm), by = "Label")) %>%
ggplot(aes(Treatment, MeanRelAbund, fill = Label)) +
geom_bar(stat = "identity") +
theme_classic() +
scale_fill_manual(values= c(cbbPalette, fungi.colors)) +
scale_y_continuous(labels = scales::percent) +
labs(x = "", y = "Relative abundance (%)", title = "Fungi") +
theme(axis.text.x = element_text(angle=45, hjust=1),
legend.text = element_text(face = "italic", size = 5),
legend.title = element_blank(),
legend.key.size = unit(0.3, 'cm'))
## Warning in psmelt(.): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(.): The rank names:
## OTU
## have been renamed to:
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
## `summarise()` has grouped output by 'Treatment'. You can override using the `.groups` argument.
## Joining, by = "Label"
#facet_wrap(~Soil, nrow = 1)
fungi.composition.A
fungi.no.norm.B <- fungi.no.norm %>%
subset_samples(Soil == "B") %>%
phyloseq::filter_taxa(function(x) sum(x) > 0, TRUE)
#Filter for soil B
set.seed(12348)
topx.fungi.B <- top_taxa(fungi.no.norm.B, n = 20)
fungi.composition.B <- fungi.no.norm %>%
subset_taxa(OTU %in% topx.fungi) %>%
microbiome::transform("compositional") %>%
psmelt() %>%
#filter(Soil == "A") %>%
group_by(Treatment, Label) %>%
summarise(MeanRelAbund = mean(Abundance)) %>%
left_join(as.data.frame(tax_table(fungi.no.norm), by = "Label")) %>%
ggplot(aes(Treatment, MeanRelAbund, fill = Label)) +
geom_bar(stat = "identity") +
theme_classic() +
scale_fill_manual(values= c(cbbPalette, fungi.colors)) +
scale_y_continuous(labels = scales::percent) +
labs(x = "", y = "Relative abundance (%)", title = "Fungi") +
theme(axis.text.x = element_text(angle=45, hjust=1),
legend.text = element_text(face = "italic", size = 5),
legend.title = element_blank(),
legend.key.size = unit(0.3, 'cm'))
## Warning in psmelt(.): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(.): The rank names:
## OTU
## have been renamed to:
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
## `summarise()` has grouped output by 'Treatment'. You can override using the `.groups` argument.
## Joining, by = "Label"
#facet_wrap(~Soil, nrow = 1)
fungi.composition.B
ggarrange(fungi.composition.A, fungi.composition.B, common.legend = T, labels = c("A", "B"))
#Relative abundance for Mortierella
#the fungi.no.norm is a phyloseq object, we need to transformed the data to relative abundance(because that's what we want for our project),
#then transform it to a data frame using psmelt(), with this, then, you can subset to A. flavus using subset(Species == "Aspergillus_flavus")
#1. you can not use the fungi.no.norm bc is not normalized, before transforming it to a data frame, do relative abundance.
mortierellaabundance <- fungi.no.norm %>%
#transforme into the relative abundance
transform_sample_counts(function(x) x / sum(x) ) %>%
#now you have relative abundance! but still in phyloseq, so, transform by usinh psmelt(). The output will be a data frame.
#psmelt transform phyloseq to data frame
psmelt()%>%
#sub-set to aspergillus
subset(Family == "Mortierellaceae")
## Warning in psmelt(.): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
## Warning in psmelt(.): The rank names:
## OTU
## have been renamed to:
## taxa_OTU
## to avoid conflicts with special phyloseq plot attribute names.
#line chart
ggplot(mortierellaabundance[mortierellaabundance$Abundance < 0.04,], aes(x = as.factor(week), y = Abundance, color = as.factor(Treatment))) +
stat_summary(fun.y=mean,geom="line", aes(group = Treatment)) +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2) +
#geom_jitter()+
theme_classic() +
ylab("Relative Abundance (%)") +
xlab("") +
scale_color_manual(values = cbbPalette) +
scale_y_continuous(labels = scales::percent) +
labs(fill = "Treatment") +
theme(legend.text = element_text(size = 10),
legend.key = element_blank(),
legend.title = element_text(size = 10),
legend.position = "right",
strip.text.x = element_text(size = 10, vjust=2),
panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
facet_wrap(~Soil, scales = "free")
# Perform indicator species analysis just considering the original groups
set.seed(12348)
indicator.management <- indicspecies::multipatt(as.data.frame(t(fungi.css.norm@otu_table)), cluster = fungi.css.norm@sam_data$Treatment, max.order = 1)
# summary of results
summary(indicator.management, indvalcomp = TRUE)
##
## Multilevel pattern analysis
## ---------------------------
##
## Association function: IndVal.g
## Significance level (alpha): 0.05
##
## Total number of species: 3088
## Selected number of species: 87
## Number of species associated to 1 group: 87
## Number of species associated to 2 groups: 0
## Number of species associated to 3 groups: 0
## Number of species associated to 4 groups: 0
##
## List of species associated to each combination:
##
## Group 1 #sps. 20
## A B stat p.value
## FOTU_25 0.39273 0.96667 0.616 0.005 **
## FOTU_789 0.37705 0.96667 0.604 0.005 **
## FOTU_314 0.64079 0.41667 0.517 0.010 **
## FOTU_4755 0.29504 0.58333 0.415 0.015 *
## FOTU_580 0.30176 0.56667 0.414 0.025 *
## FOTU_5030 0.28745 0.50000 0.379 0.050 *
## FOTU_690 0.46742 0.28333 0.364 0.025 *
## FOTU_942 0.68503 0.16667 0.338 0.040 *
## FOTU_835 0.36723 0.28333 0.323 0.030 *
## FOTU_1247 0.41580 0.23333 0.311 0.030 *
## FOTU_1071 0.45510 0.20000 0.302 0.035 *
## FOTU_1620 0.84811 0.08333 0.266 0.010 **
## FOTU_554 0.68383 0.10000 0.262 0.025 *
## FOTU_5516 0.97034 0.06667 0.254 0.040 *
## FOTU_1404 0.76868 0.08333 0.253 0.005 **
## FOTU_2279 0.62090 0.08333 0.227 0.040 *
## FOTU_1661 1.00000 0.05000 0.224 0.045 *
## FOTU_1927 1.00000 0.05000 0.224 0.025 *
## FOTU_2576 1.00000 0.05000 0.224 0.050 *
## FOTU_3633 0.87866 0.05000 0.210 0.040 *
##
## Group 2 #sps. 15
## A B stat p.value
## FOTU_132 0.31032 0.61667 0.437 0.035 *
## FOTU_3590 0.85175 0.15000 0.357 0.005 **
## FOTU_2046 0.50300 0.20000 0.317 0.005 **
## FOTU_2191 0.50260 0.20000 0.317 0.005 **
## FOTU_1285 0.40682 0.21667 0.297 0.050 *
## FOTU_4901 0.93891 0.08333 0.280 0.025 *
## FOTU_1219 0.77966 0.08333 0.255 0.010 **
## FOTU_1288 0.93040 0.06667 0.249 0.045 *
## FOTU_1094 0.58148 0.10000 0.241 0.045 *
## FOTU_5764 0.57903 0.10000 0.241 0.045 *
## FOTU_3427 0.85818 0.06667 0.239 0.025 *
## FOTU_3044 0.75190 0.06667 0.224 0.040 *
## FOTU_2575 1.00000 0.05000 0.224 0.030 *
## FOTU_4494 0.68777 0.06667 0.214 0.025 *
## FOTU_3673 0.80987 0.05000 0.201 0.025 *
##
## Group 3 #sps. 18
## A B stat p.value
## FOTU_23 0.74437 0.55932 0.645 0.015 *
## FOTU_793 0.73724 0.52542 0.622 0.005 **
## FOTU_1365 0.51918 0.66102 0.586 0.045 *
## FOTU_4235 0.62591 0.40678 0.505 0.015 *
## FOTU_442 0.47203 0.33898 0.400 0.025 *
## FOTU_930 0.35251 0.38983 0.371 0.015 *
## FOTU_1148 0.39896 0.30508 0.349 0.005 **
## FOTU_3555 0.64358 0.11864 0.276 0.010 **
## FOTU_1291 0.60201 0.11864 0.267 0.040 *
## FOTU_1556 1.00000 0.06780 0.260 0.005 **
## FOTU_4605 1.00000 0.06780 0.260 0.005 **
## FOTU_1030 0.64512 0.10169 0.256 0.005 **
## FOTU_5023 0.94949 0.06780 0.254 0.015 *
## FOTU_2393 0.87570 0.06780 0.244 0.015 *
## FOTU_2433 0.86066 0.06780 0.242 0.005 **
## FOTU_2658 0.52599 0.10169 0.231 0.045 *
## FOTU_459 1.00000 0.05085 0.225 0.010 **
## FOTU_4175 0.63309 0.06780 0.207 0.020 *
##
## Group 4 #sps. 12
## A B stat p.value
## FOTU_829 0.39916 0.72881 0.539 0.035 *
## FOTU_940 0.77158 0.13559 0.323 0.005 **
## FOTU_4311 0.82342 0.11864 0.313 0.035 *
## FOTU_409 0.80693 0.11864 0.309 0.045 *
## FOTU_3634 0.63843 0.10169 0.255 0.025 *
## FOTU_4719 0.52891 0.11864 0.251 0.030 *
## FOTU_2663 0.58529 0.10169 0.244 0.015 *
## FOTU_4082 0.60744 0.08475 0.227 0.025 *
## FOTU_5149 1.00000 0.05085 0.225 0.010 **
## FOTU_484 0.95762 0.05085 0.221 0.040 *
## FOTU_4568 0.89297 0.05085 0.213 0.045 *
## FOTU_2815 0.87005 0.05085 0.210 0.025 *
##
## Group 5 #sps. 22
## A B stat p.value
## FOTU_3095 0.29708 0.98333 0.540 0.045 *
## FOTU_756 0.49238 0.51667 0.504 0.015 *
## FOTU_615 0.54071 0.38333 0.455 0.005 **
## FOTU_387 0.58232 0.35000 0.451 0.015 *
## FOTU_1097 0.39511 0.45000 0.422 0.005 **
## FOTU_675 0.76954 0.21667 0.408 0.015 *
## FOTU_5042 0.76752 0.21667 0.408 0.015 *
## FOTU_535 0.35605 0.45000 0.400 0.035 *
## FOTU_656 0.30699 0.45000 0.372 0.045 *
## FOTU_1372 0.50904 0.26667 0.368 0.015 *
## FOTU_663 0.76694 0.16667 0.358 0.030 *
## FOTU_3137 0.54768 0.20000 0.331 0.005 **
## FOTU_1753 0.60186 0.16667 0.317 0.015 *
## FOTU_1492 0.90335 0.08333 0.274 0.020 *
## FOTU_4119 0.52046 0.13333 0.263 0.015 *
## FOTU_1571 0.75782 0.08333 0.251 0.005 **
## FOTU_1410 0.72051 0.08333 0.245 0.040 *
## FOTU_5051 0.50215 0.10000 0.224 0.050 *
## FOTU_2013 1.00000 0.05000 0.224 0.025 *
## FOTU_3777 1.00000 0.05000 0.224 0.035 *
## FOTU_2112 0.84307 0.05000 0.205 0.030 *
## FOTU_4013 0.82590 0.05000 0.203 0.030 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
unite_taxonomy$OTU <- rownames(unite_taxonomy)
unite_taxonomy[unite_taxonomy$OTU == "BOTU_1527",]
# Explore some of these taxa, what are they? What might they do?
indicator.treatment <- indicator.management$sign
indicator.treatment2 <- indicator.treatment %>%
subset(p.value < 0.01) %>%
mutate(OTU = rownames(.))
indicator.treatment3 <- left_join(indicator.treatment2, unite_taxonomy, by = "OTU")
indicator.treatment3$category <- ifelse(indicator.treatment3$index == 1, "Dry1",
ifelse(indicator.treatment3$index == 2, "Moderate2",
ifelse(indicator.treatment3$index == 3, "Moderate3",
ifelse(indicator.treatment3$index == 4, "Moderate4",
ifelse(indicator.treatment3$index == 5, "Wet5", NA)))))
indicator.treatment4 <- indicator.treatment3 %>%
count(OTU)
#indicator.treatment4$Phylum_other <- ifelse(indicator.treatment4$n < 10, "Other", indicator.treatment4$Phylum)
indicator.treatment5 <- left_join(indicator.treatment3, indicator.treatment4, by = "OTU")
q<- ggplot(indicator.treatment3, aes(x = category, fill = Label)) +
geom_bar(position = "stack") +
scale_fill_manual(values = c(cbbPalette, "purple", "brown", "grey", "pink", "red", "blue", "green", "cyan", "gold")) +
theme_classic() +
xlab("")
#facet_wrap(~Soil, scales = "free")
q + theme(axis.text.x = element_text(angle = 60, hjust = 1))
fungi.no.norm@sam_data$Sample
## [1] "SoilAT2R1W0" "SoilAT3R1W0" "SoilBT5R1W0" "SoilAT5R1W0" "SoilBT3R1W0"
## [6] "SoilBT1R1W0" "SoilBT4R1W0" "SoilAT1R1W0" "SoilBT2R1W0" "SoilAT4R2W0"
## [11] "SoilAT5R2W0" "SoilBT2R2W0" "SoilBT1R2W0" "SoilAT1R2W0" "SoilBT3R2W0"
## [16] "SoilAT3R2W0" "SoilAT2R2W0" "SoilBT5R2W0" "SoilBT4R2W0" "SoilBT2R3W0"
## [21] "SoilAT3R3W0" "SoilAT5R3W0" "SoilBT5R3W0" "SoilBT1R3W0" "SoilAT4R3W0"
## [26] "SoilAT1R3W0" "SoilBT3R3W0" "SoilBT4R3W0" "SoilAT2R3W0" "SoilBT5R4W0"
## [31] "SoilAT1R4W0" "SoilAT2R4W0" "SoilAT5R4W0" "SoilBT3R4W0" "SoilBT4R4W0"
## [36] "SoilBT1R4W0" "SoilAT3R4W0" "SoilAT4R4W0" "SoilBT2R4W0" "SoilAT1R5W0"
## [41] "SoilBT1R5W0" "SoilBT4R5W0" "SoilAT5R5W0" "SoilBT2R5W0" "SoilAT4R5W0"
## [46] "SoilAT2R5W0" "SoilAT3R5W0" "SoilBT3R5W0" "SoilBT5R5W0" "SoilBT4R6W0"
## [51] "SoilAT3R6W0" "SoilAT1R6W0" "SoilAT4R6W0" "SoilBT3R6W0" "SoilAT2R6W0"
## [56] "SoilBT5R6W0" "SoilAT5R6W0" "SoilBT1R6W0" "SoilBT2R6W0" "SoilAT2R1W2"
## [61] "SoilAT3R1W2" "SoilBT5R1W2" "SoilAT5R1W2" "SoilAT4R1W2" "SoilBT3R1W2"
## [66] "SoilBT1R1W2" "SoilBT4R1W2" "SoilAT1R1W2" "SoilBT2R1W2" "SoilAT4R2W2"
## [71] "SoilAT5R2W2" "SoilBT2R2W2" "SoilBT1R2W2" "SoilAT1R2W2" "SoilBT3R2W2"
## [76] "SoilAT3R2W2" "SoilAT2R2W2" "SoilBT5R2W2" "SoilBT4R2W2" "SoilBT2R3W2"
## [81] "SoilAT3R3W2" "SoilAT5R3W2" "SoilBT5R3W2" "SoilBT1R3W2" "SoilAT4R3W2"
## [86] "SoilAT1R3W2" "SoilBT3R3W2" "SoilBT4R3W2" "SoilAT2R3W2" "SoilBT5R4W2"
## [91] "SoilAT1R4W2" "SoilAT2R4W2" "SoilAT5R4W2" "SoilBT3R4W2" "SoilBT4R4W2"
## [96] "SoilBT1R4W2" "SoilAT3R4W2" "SoilAT4R4W2" "SoilBT2R4W2" "SoilAT1R5W2"
## [101] "SoilBT1R5W2" "SoilBT4R5W2" "SoilAT5R5W2" "SoilBT2R5W2" "SoilAT4R5W2"
## [106] "SoilAT2R5W2" "SoilAT3R5W2" "SoilBT3R5W2" "SoilBT5R5W2" "SoilBT4R6W2"
## [111] "SoilAT3R6W2" "SoilAT1R6W2" "SoilAT4R6W2" "SoilBT3R6W2" "SoilAT2R6W2"
## [116] "SoilBT5R6W2" "SoilAT5R6W2" "SoilBT1R6W2" "SoilBT2R6W2" "SoilAT2R1W5"
## [121] "SoilAT3R1W5" "SoilBT5R1W5" "SoilAT5R1W5" "SoilAT4R1W5" "SoilBT3R1W5"
## [126] "SoilBT1R1W5" "SoilBT4R1W5" "SoilAT1R1W5" "SoilBT2R1W5" "SoilAT4R2W5"
## [131] "SoilAT5R2W5" "SoilBT2R2W5" "SoilBT1R2W5" "SoilAT1R2W5" "SoilBT3R2W5"
## [136] "SoilAT3R2W5" "SoilAT2R2W5" "SoilBT5R2W5" "SoilBT4R2W5" "SoilBT2R3W5"
## [141] "SoilAT3R3W5" "SoilAT5R3W5" "SoilBT5R3W5" "SoilBT1R3W5" "SoilAT4R3W5"
## [146] "SoilAT1R3W5" "SoilBT3R3W5" "SoilBT4R3W5" "SoilAT2R3W5" "SoilBT5R4W5"
## [151] "SoilAT1R4W5" "SoilAT2R4W5" "SoilAT5R4W5" "SoilBT3R4W5" "SoilBT4R4W5"
## [156] "SoilBT1R4W5" "SoilAT3R4W5" "SoilAT4R4W5" "SoilBT2R4W5" "SoilAT1R5W5"
## [161] "SoilBT1R5W5" "SoilBT4R5W5" "SoilAT5R5W5" "SoilBT2R5W5" "SoilAT4R5W5"
## [166] "SoilAT2R5W5" "SoilAT3R5W5" "SoilBT3R5W5" "SoilBT5R5W5" "SoilBT4R6W5"
## [171] "SoilAT1R6W5" "SoilAT4R6W5" "SoilBT3R6W5" "SoilAT2R6W5" "SoilBT5R6W5"
## [176] "SoilAT5R6W5" "SoilBT1R6W5" "SoilBT2R6W5" "SoilAT2R1W7" "SoilAT3R1W7"
## [181] "SoilBT5R1W7" "SoilAT5R1W7" "SoilAT4R1W7" "SoilBT3R1W7" "SoilBT1R1W7"
## [186] "SoilBT4R1W7" "SoilAT1R1W7" "SoilBT2R1W7" "SoilAT4R2W7" "SoilAT5R2W7"
## [191] "SoilBT2R2W7" "SoilBT1R2W7" "SoilAT1R2W7" "SoilBT3R2W7" "SoilAT3R2W7"
## [196] "SoilAT2R2W7" "SoilBT5R2W7" "SoilBT4R2W7" "SoilBT2R3W7" "SoilAT3R3W7"
## [201] "SoilAT5R3W7" "SoilBT5R3W7" "SoilBT1R3W7" "SoilAT4R3W7" "SoilAT1R3W7"
## [206] "SoilBT3R3W7" "SoilBT4R3W7" "SoilAT2R3W7" "SoilBT5R4W7" "SoilAT1R4W7"
## [211] "SoilAT2R4W7" "SoilAT5R4W7" "SoilBT3R4W7" "SoilBT4R4W7" "SoilBT1R4W7"
## [216] "SoilAT3R4W7" "SoilAT4R4W7" "SoilBT2R4W7" "SoilAT1R5W7" "SoilBT1R5W7"
## [221] "SoilBT4R5W7" "SoilAT5R5W7" "SoilBT2R5W7" "SoilAT4R5W7" "SoilAT2R5W7"
## [226] "SoilAT3R5W7" "SoilBT3R5W7" "SoilBT5R5W7" "SoilBT4R6W7" "SoilAT3R6W7"
## [231] "SoilAT1R6W7" "SoilAT4R6W7" "SoilBT3R6W7" "SoilAT2R6W7" "SoilBT5R6W7"
## [236] "SoilAT5R6W7" "SoilBT1R6W7" "SoilBT2R6W7" "SoilAT2R1W9" "SoilAT3R1W9"
## [241] "SoilBT5R1W9" "SoilAT5R1W9" "SoilAT4R1W9" "SoilBT3R1W9" "SoilBT1R1W9"
## [246] "SoilBT4R1W9" "SoilAT1R1W9" "SoilBT2R1W9" "SoilAT4R2W9" "SoilAT5R2W9"
## [251] "SoilBT2R2W9" "SoilBT1R2W9" "SoilAT1R2W9" "SoilBT3R2W9" "SoilAT3R2W9"
## [256] "SoilAT2R2W9" "SoilBT5R2W9" "SoilBT4R2W9" "SoilBT2R3W9" "SoilAT3R3W9"
## [261] "SoilAT5R3W9" "SoilBT5R3W9" "SoilBT1R3W9" "SoilAT4R3W9" "SoilAT1R3W9"
## [266] "SoilBT3R3W9" "SoilBT4R3W9" "SoilAT2R3W9" "SoilBT5R4W9" "SoilAT1R4W9"
## [271] "SoilAT2R4W9" "SoilAT5R4W9" "SoilBT3R4W9" "SoilBT4R4W9" "SoilBT1R4W9"
## [276] "SoilAT3R4W9" "SoilAT4R4W9" "SoilBT2R4W9" "SoilAT1R5W9" "SoilBT1R5W9"
## [281] "SoilBT4R5W9" "SoilAT5R5W9" "SoilBT2R5W9" "SoilAT4R5W9" "SoilAT2R5W9"
## [286] "SoilAT3R5W9" "SoilBT3R5W9" "SoilBT5R5W9" "SoilBT4R6W9" "SoilAT3R6W9"
## [291] "SoilAT1R6W9" "SoilAT4R6W9" "SoilBT3R6W9" "SoilAT2R6W9" "SoilBT5R6W9"
## [296] "SoilAT5R6W9" "SoilBT1R6W9" "SoilBT2R6W9"
map <- fungi.no.norm@sam_data %>%
as("data.frame")
# Core - abundance occupancy modeling- Peanut
core.prioritizing <- function(phyloseq.object){
set.seed(19)
rare.phyloseq.object <- rarefy_even_depth(phyloseq.object, replace=TRUE)
nReads=sample_sums(rare.phyloseq.object)[[1]] # input dataset needs to be rarified and the rarifaction depth included
otu <- rare.phyloseq.object@otu_table %>%
as("matrix")
map <- rare.phyloseq.object@sam_data %>%
as("data.frame")
otu_PA <- 1*((otu>0)==1) # presence-absence data
otu_occ <- rowSums(otu_PA)/ncol(otu_PA) # occupancy calculation
otu_rel <- apply(decostand(otu, method="total", MARGIN=2),1, mean) # mean relative abundance
occ_abun <- add_rownames(as.data.frame(cbind(otu_occ, otu_rel)),'otu') # combining occupancy and abundance data frame
# Ranking OTUs based on their occupancy
# For caluclating raking index we included following conditions:
# - time-specific occupancy (sumF) = frequency of detection within time point (genotype or site)
# - replication consistency (sumG) = has occupancy of 1 in at least one time point (genotype or site) (1 if occupancy 1, else 0)
PresenceSum <- data.frame(otu = as.factor(row.names(otu)), otu) %>%
gather(Sample, abun, -otu) %>%
left_join(map, by = 'Sample') %>% #edit for sample id column in metadata
group_by(otu, week) %>% #edit for time point column in metadata
dplyr::summarise(time_freq=sum(abun>0)/length(abun), # frequency of detection between time points
coreTime=ifelse(time_freq == 1, 1, 0)) %>% # 1 only if occupancy 1 with specific time, 0 if not
group_by(otu) %>%
dplyr::summarise(sumF=sum(time_freq),
sumG=sum(coreTime),
nS=length(week)*2, #edit for time point column in metadata
Index=(sumF+sumG)/nS) # calculating weighting Index based on number of time points detected and
otu_ranked <- occ_abun %>%
left_join(PresenceSum, by='otu') %>%
transmute(otu=otu,
rank=Index) %>%
arrange(desc(rank))
# Calculating the contribution of ranked OTUs to the BC similarity
BCaddition <- NULL
# calculating BC dissimilarity based on the 1st ranked OTU
# with 36 samples there should be 630 combinations n!/r!
otu_start=otu_ranked$otu[1]
start_matrix <- as.matrix(otu[otu_start,])
start_matrix <- t(start_matrix)
x <- apply(combn(ncol(start_matrix), 2), 2, function(x) sum(abs(start_matrix[,x[1]]- start_matrix[,x[2]]))/(2*nReads))
x_names <- apply(combn(ncol(start_matrix), 2), 2, function(x) paste(colnames(start_matrix)[x], collapse=' - '))
df_s <- data.frame(x_names,x)
df_s$rank_count <- 1
BCaddition <- rbind(BCaddition,df_s)
# calculating BC dissimilarity based on additon of ranked OTUs from 2nd to 500th. Can be set to the entire length of OTUs in the dataset, however it might take some time if more than 5000 OTUs are included.
for(i in 2:500){
otu_add=otu_ranked$otu[i]
add_matrix <- as.matrix(otu[otu_add,])
add_matrix <- t(add_matrix)
start_matrix <- rbind(start_matrix, add_matrix)
x <- apply(combn(ncol(start_matrix), 2), 2, function(x) sum(abs(start_matrix[,x[1]]-start_matrix[,x[2]]))/(2*nReads))
#x_names <- apply(combn(ncol(start_matrix), 2), 2, function(x) paste(colnames(start_matrix)[x], collapse=' - '))
df_a <- data.frame(x_names,x)
df_a$rank_count <- i
BCaddition <- rbind.data.frame(BCaddition, df_a)
}
# calculating the BC dissimilarity of the whole dataset (not needed if the second loop is already including all OTUs)
x <- apply(combn(ncol(otu), 2), 2, function(x) sum(abs(otu[,x[1]]-otu[,x[2]]))/(2*nReads))
x_names <- apply(combn(ncol(otu), 2), 2, function(x) paste(colnames(otu)[x], collapse=' - '))
df_full <- data.frame(x_names,x)
df_full$rank_count <- length(rownames(otu))
BCfull <- rbind.data.frame(BCaddition, df_full)
BC_ranked <- BCfull %>%
group_by(rank_count) %>%
dplyr::summarise(MeanBC=mean(x)) %>% # mean Bray-Curtis dissimilarity
arrange(desc(-MeanBC)) %>%
mutate(proportionBC=MeanBC/max(MeanBC)) # proportion of the dissimilarity explained by the n number of ranked OTUs
Increase=BC_ranked$MeanBC[-1]/BC_ranked$MeanBC[-length(BC_ranked$MeanBC)]
increaseDF <- data.frame(IncreaseBC=c(0,(Increase)), rank=factor(c(1:(length(Increase)+1))))
increaseDF$rank <- as.numeric(increaseDF$rank)
BC_ranked <- left_join(BC_ranked, increaseDF, by = c("rank_count" = "rank"))
BC_ranked <- BC_ranked[-nrow(BC_ranked),]
#Creating threshold for core inclusion - last call method
#B) Final increase in BC similarity of equal or greater then 2%
lastCall <- last(as.numeric(BC_ranked$rank_count[(BC_ranked$IncreaseBC>=1.02)]))
#Creating plot of Bray-Curtis similarity
plot <- ggplot(BC_ranked[1:100,], aes(x=factor(BC_ranked$rank_count[1:100], levels=BC_ranked$rank_count[1:100]))) +
geom_point(aes(y=proportionBC)) +
theme_classic() + theme(strip.background = element_blank(),axis.text.x = element_text(size=7, angle=45)) +
geom_vline(xintercept=last(as.numeric(BC_ranked$rank_count[(BC_ranked$IncreaseBC>=1.02)])), lty=3, col='black', cex=.5) +
labs(x='ranked OTUs',y='Bray-Curtis similarity') +
annotate(geom="text", x=last(as.numeric(BC_ranked$rank[(BC_ranked$IncreaseBC>=1.02)]))+3, y=.5, label=paste("Last 2% increase (",last(as.numeric(BC_ranked$rank[(BC_ranked$IncreaseBC>=1.02)])),")",sep=''), color="black")
core.otus.CSS.mean.T1 <- otu_ranked$otu[1:lastCall]
return_list <- list(core.otus.CSS.mean.T1, plot, otu_ranked, occ_abun)
return(return_list)
}
#Takes a long time
fungi.core <- core.prioritizing(fungi.no.norm)
## You set `rngseed` to FALSE. Make sure you've set & recorded
## the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 96OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
## Warning: `add_rownames()` was deprecated in dplyr 1.0.0.
## ℹ Please use `tibble::rownames_to_column()` instead.
## `summarise()` has grouped output by 'otu'. You can override using the `.groups`
## argument.
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Warning: Unknown or uninitialised column: `rank`.
## Unknown or uninitialised column: `rank`.
saveRDS(fungi.core, file = "fungi.no.norm.core_check_point01.11.23.rds")
#Restore the object
fungi.core <- readRDS(file = "fungi.no.norm.core_check_point01.11.23.rds")
So, if this data I am using for the core does not include the soils, how can I include the soils that is not merge bc we have subseted a lot so I don’t want to add everything here. I would like to have soils and do a ggarange of the core microbiome for both to see if anything stands out.
fungi.core[[1]]
## [1] "FOTU_1" "FOTU_1091" "FOTU_16" "FOTU_162" "FOTU_1686" "FOTU_17"
## [7] "FOTU_2" "FOTU_263" "FOTU_3" "FOTU_32" "FOTU_3358" "FOTU_4156"
## [13] "FOTU_468" "FOTU_471" "FOTU_5" "FOTU_542" "FOTU_5905" "FOTU_6"
## [19] "FOTU_67" "FOTU_794" "FOTU_886" "FOTU_993" "FOTU_1109" "FOTU_1606"
## [25] "FOTU_512" "FOTU_530" "FOTU_1418" "FOTU_757" "FOTU_4112" "FOTU_70"
## [31] "FOTU_8" "FOTU_18" "FOTU_79" "FOTU_7" "FOTU_1545" "FOTU_875"
## [37] "FOTU_1500" "FOTU_899" "FOTU_1207" "FOTU_30" "FOTU_58" "FOTU_85"
## [43] "FOTU_54" "FOTU_1360" "FOTU_63" "FOTU_187" "FOTU_10" "FOTU_1630"
## [49] "FOTU_1257" "FOTU_1909" "FOTU_1168" "FOTU_15" "FOTU_64" "FOTU_1627"
## [55] "FOTU_968" "FOTU_226" "FOTU_2048" "FOTU_5628" "FOTU_36" "FOTU_1861"
## [61] "FOTU_924" "FOTU_46" "FOTU_1139" "FOTU_1018" "FOTU_78" "FOTU_27"
## [67] "FOTU_98" "FOTU_61" "FOTU_24" "FOTU_469" "FOTU_39" "FOTU_13"
## [73] "FOTU_789" "FOTU_174" "FOTU_2001" "FOTU_959" "FOTU_25" "FOTU_45"
## [79] "FOTU_232" "FOTU_188" "FOTU_5419" "FOTU_3185" "FOTU_2134" "FOTU_2878"
## [85] "FOTU_29" "FOTU_1051" "FOTU_4154" "FOTU_3095" "FOTU_147" "FOTU_49"
## [91] "FOTU_888" "FOTU_9" "FOTU_467" "FOTU_84" "FOTU_124" "FOTU_1413"
## [97] "FOTU_1581" "FOTU_122" "FOTU_1369" "FOTU_1249" "FOTU_119" "FOTU_612"
## [103] "FOTU_983" "FOTU_1601" "FOTU_11" "FOTU_77" "FOTU_2261" "FOTU_31"
## [109] "FOTU_129" "FOTU_1619" "FOTU_190" "FOTU_791" "FOTU_4" "FOTU_2132"
## [115] "FOTU_610" "FOTU_111" "FOTU_161" "FOTU_332"
library(tyRa)
set.seed(19)
rare.phyloseq.object <- rarefy_even_depth(fungi.no.norm, replace=TRUE)
## You set `rngseed` to FALSE. Make sure you've set & recorded
## the random seed of your session for reproducibility.
## See `?set.seed`
## ...
## 96OTUs were removed because they are no longer
## present in any sample after random subsampling
## ...
nReads=sample_sums(rare.phyloseq.object)[[1]] # input dataset needs to be rarified and the rarifaction depth included
otu <- rare.phyloseq.object@otu_table %>%
as("matrix")
taxa <- rownames(otu)
map <- rare.phyloseq.object@sam_data %>%
as("data.frame")
spp.out <- tyRa::fit_sncm(spp = t(otu), pool=NULL, taxon=taxa)
## Waiting for profiling to be done...
predictions <- spp.out$predictions
predictions$otu <- rownames(predictions)
# Abundance-Occupancy
taxonomy <- fungi.no.norm@tax_table %>%
as("matrix") %>%
as_tibble() %>%
mutate(otu = rownames(fungi.no.norm@tax_table))
abund.occ3 <- left_join(taxonomy, predictions, by = "otu")
abund.occ3$core <- ifelse(abund.occ3$otu %in% fungi.core[[1]], "Core", "Not Core")
library(ggrepel)
core <- ggplot() +
geom_point(data = abund.occ3, aes(x = log10(p), y = freq, color = fit_class, shape = core), alpha = 0.8, size = 2) +
geom_line(color='black', data=abund.occ3, size=1, aes(y=abund.occ3$freq.pred, x=log10(abund.occ3$p)), alpha=.25) +
geom_line(color='black', lty='twodash', size=1, data=abund.occ3, aes(y=abund.occ3$pred.upr, x=log10(abund.occ3$p)), alpha=.25)+
geom_line(color='black', lty='twodash', size=1, data=abund.occ3, aes(y=abund.occ3$pred.lwr, x=log10(abund.occ3$p)), alpha=.25)+
labs(x="log10(Mean relative abundance)", y="Occupancy") +
theme_classic() +
scale_color_manual(values = c("#000000", "#E69F00", "#56B4E9")) +
geom_text_repel(data = abund.occ3[abund.occ3$core == "Core" & abund.occ3$fit_class == "Below prediction",],
aes(x = log10(p), y = freq, label = Label))
plot(core)
library(gganimate)
library(gganimate)
library(ggplot2)
library(gganimate)
library('gifski')
library('png')
library(gapminder)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:XVector':
##
## slice
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:Hmisc':
##
## subplot
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(transformr)
library(gapminder)
library(ggplot2)
library(gganimate)
library('gifski')
library('png')
coreInteractive <-ggplot() +
geom_point(data = abund.occ3, aes(x = log10(p), y = freq, shape = core, color = fit_class, text = paste("Species:", Species)), alpha = 0.8, size = 1) +
#geom_point(aes(shape= "Species")) +
geom_line(color='black', data=abund.occ3, size=1, aes(y=abund.occ3$freq.pred, x=log10(abund.occ3$p)), alpha=.25) +
geom_line(color='black', lty='twodash', size=1, data=abund.occ3, aes(y=abund.occ3$pred.upr, x=log10(abund.occ3$p)), alpha=.25)+
geom_line(color='black', lty='twodash', size=1, data=abund.occ3, aes(y=abund.occ3$pred.lwr, x=log10(abund.occ3$p)), alpha=.25)+
labs(x="log10(Mean relative abundance)", y="Occupancy") +
theme_classic() +
scale_color_manual(values = c("#000000", "#E69F00", "#56B4E9")) +
geom_text_repel(data = abund.occ3[abund.occ3$core == "Core" & abund.occ3$fit_class == "Below prediction",],
aes(x = log10(p), y = freq, label = Label))
ggplotly(coreInteractive)